In [1]:
from autograd import grad
import autograd.numpy as np
import autograd.numpy.random as npr
import matplotlib.pyplot as plt
%matplotlib inline

xmin,xmax=0,10

Nonlinear regression with fixed basis functions

Given a set of basis functions $\phi_h(x)$, we represent our function class as $y_{pred}(x;\mathbf{w}) = \sum_h w_h \phi_h(x)$, and want to learn a vector $\mathbf{w}_{opt}$ of weights such that $y_{pred}(x;\mathbf{w}_{opt}) \approx y_{true}(x)$


In [566]:
n_basis_fxns=15
basis_points=np.linspace(xmin,xmax,n_basis_fxns)
basis_fxns = np.empty(n_basis_fxns,dtype=object)

class RBF():
    def __init__(self,center,r=1.0):
        self.c=center
        self.r=r

    def __call__(self,x):
        return np.exp(-(np.sum((x-self.c)**2) / (2*self.r**2)))

class StepFxn():
    def __init__(self,center,r=1.0):
        self.c=center
        self.r=r
    
    def __call__(self,x):
        return 1.0*(np.abs(x-self.c) < self.r)

#print(basis_points)
basis_expansion_of_1 = np.zeros(n_basis_fxns)

for i,c in enumerate(basis_points):
    
    basis_fxns[i]=RBF(c,0.5)
    #basis_fxns[i] = StepFxn(c)
    #print(phi(1.0))
    basis_expansion_of_1[i] = basis_fxns[i](1.0)

In [567]:
plt.plot(basis_expansion_of_1)


Out[567]:
[<matplotlib.lines.Line2D at 0x1229fba90>]

In [568]:
for fxn in basis_fxns:
    print(fxn(1.0))


0.135335283237
0.849365816568
0.692569324205
0.0733696513684
0.00100984423411
1.80583079846e-06
4.19550701874e-10
1.26641655491e-14
4.96653427578e-20
2.53055060031e-26
1.67517948323e-33
1.4407624984e-41
1.60993438774e-50
2.337268946e-60
4.40853133146e-71

In [569]:
basis_fxns


Out[569]:
array([<__main__.RBF instance at 0x12cef4b48>,
       <__main__.RBF instance at 0x1226af4d0>,
       <__main__.RBF instance at 0x1226af830>,
       <__main__.RBF instance at 0x1226afab8>,
       <__main__.RBF instance at 0x1226af6c8>,
       <__main__.RBF instance at 0x122443cf8>,
       <__main__.RBF instance at 0x1225b4440>,
       <__main__.RBF instance at 0x1225b4098>,
       <__main__.RBF instance at 0x1222f6440>,
       <__main__.RBF instance at 0x1226be830>,
       <__main__.RBF instance at 0x1226befc8>,
       <__main__.RBF instance at 0x122a3ba28>,
       <__main__.RBF instance at 0x122a3bb90>,
       <__main__.RBF instance at 0x122a3bbd8>,
       <__main__.RBF instance at 0x122a3bd40>], dtype=object)

In [570]:
def basis_fxn_expansion(x):
    return np.array([f(x) for f in basis_fxns])

In [571]:
plt.plot(basis_fxn_expansion(1.0))


Out[571]:
[<matplotlib.lines.Line2D at 0x122b55a90>]

In [572]:
def pred_y(x,weights):
    phis = basis_fxn_expansion(x)
    return np.dot(weights,phis)

In [573]:
x = np.linspace(xmin,xmax,500)
def f(x):
    return np.sin(2*x) - np.sin(3*x)
y =  f(x)

plt.plot(x,y)


Out[573]:
[<matplotlib.lines.Line2D at 0x122ea0e50>]

In [574]:
pred_y(5,np.ones(len(basis_fxns)))


Out[574]:
1.7548609130099042

In [575]:
Q = np.array([basis_fxn_expansion(i) for i in x])
plt.imshow(Q)


Out[575]:
<matplotlib.image.AxesImage at 0x123010cd0>

In [576]:
np.dot(Q.T,np.ones(len(x))).shape


Out[576]:
(15,)

In [577]:
def objective(weights):
    ''' minimize this'''
    #prior = np.sum(weights**2)
    #pred = np.array([pred_y(p,weights) for p in x])
    pred = np.dot(Q,weights)
    mse = np.sum((pred-y)**2)
    return mse
    #log_likelihood = - np.sum((pred-y)**2)
    #return prior + log_likelihood

In [578]:
from time import time
t = time()
objective(np.ones(len(basis_fxns)))
print(time() - t)


0.000166893005371

In [579]:
grad(objective)(np.ones(len(basis_fxns)))


Out[579]:
array([ 103.9262841 ,  157.34852577,  160.02693102,  294.39181578,
        290.93787042,  122.9821193 ,  173.9500897 ,  287.15073685,
        248.10218691,  214.87686057,  166.10047866,  179.32050896,
        316.15932406,  248.52554628,   54.9405699 ])

In [580]:
def gradient_descent(objective,start_point,
                     n_iter=100,step_size=0.1):
    intermediates = np.zeros((n_iter,len(start_point)))
    gradient = grad(objective)
    x = start_point
    intermediates[0] = x
    for i in range(1,n_iter):
        update = gradient(x)*step_size
        x = x - update
        intermediates[i] = x
    return intermediates

In [581]:
w = gradient_descent(objective,npr.randn(len(basis_fxns)),
                        n_iter=10000,step_size=0.005)
w_opt = w[-1]

In [582]:
plt.plot(w_opt)


Out[582]:
[<matplotlib.lines.Line2D at 0x12337e9d0>]

In [583]:
predicted = np.array([pred_y(i,w_opt) for i in x])
plt.plot(x,predicted,label='Model',linewidth=3,alpha=0.5)
plt.plot(x,y,label='True',linewidth=3,alpha=0.5)
for i,basis_fxn in enumerate(basis_fxns):
    if i == 0:
        plt.plot(x,[basis_fxn(x_) for x_ in x],c='grey',alpha=0.3,
                label='Basis functions')
        plt.plot(x,[basis_fxn(x_)*w_opt[i] for x_ in x],c='grey',
                 label='Weighted basis functions')
    else:
        plt.plot(x,[basis_fxn(x_)*w_opt[i] for x_ in x],c='grey')
        plt.plot(x,[basis_fxn(x_) for x_ in x],c='grey',alpha=0.3)

#plt.ylim(-3,2)
plt.legend(loc='best')


Out[583]:
<matplotlib.legend.Legend at 0x1234f7d90>

In [526]:
mse = np.sum((predicted - y)**2) / len(predicted)
mse


Out[526]:
0.38187366304040632

Adaptive basis functions

$$y(\mathbf{x}; \mathbf{w}) = \sum_h w_h^{(2)} \text{tanh}\left( \sum_i w_{hi}^{(1)}x_i + w_{h0}^{(1)} \right) + w_0^{(2)}$$

In [370]:
import gptools

In [ ]:


In [ ]:


In [ ]:


In [447]:
# reproducing Figure 1 from: http://mlg.eng.cam.ac.uk/pub/pdf/Ras04.pdf
import numpy as np
import numpy.random as npr
from numpy.linalg import cholesky
from numpy.matlib import repmat

xs = np.linspace(-5,5,1000)
ns = len(xs)
keps=1e-9

m = lambda x: 0.25*x**2
def K_mat(xs_1,xs_2):
    diff_mat = repmat(xs_1,len(xs_2),1) - repmat(xs_2,len(xs_1),1).T
    return np.exp(-0.5*(diff_mat)**2)

fs = m(xs) + cholesky(K_mat(xs,xs)+keps*np.eye(ns)).dot(npr.randn(ns))

In [459]:
npr.seed(0)
mean = m(xs)
choleskied = cholesky(K_mat(xs,xs)+keps*np.eye(ns))



fs = np.zeros((10000,len(xs)))

for i in range(len(fs)):
    fs[i] = mean + choleskied.dot(npr.randn(ns))
    if i < 5:
        plt.plot(xs,fs[i],c='blue',alpha=0.5)

plt.plot(xs,fs.mean(0),c='cyan')
plt.fill_between(xs,fs.mean(0)+1.96*np.sqrt(fs.std(0)),fs.mean(0)-1.96*np.sqrt(fs.std(0)),alpha=0.2)


Out[459]:
<matplotlib.collections.PolyCollection at 0x114337b90>

In [453]:
plt.hist([xs[np.argmin(f)] for f in fs],bins=50);



In [ ]:


In [449]:
%timeit np.argmin(cholesky.dot(npr.randn(ns)))


The slowest run took 4.97 times longer than the fastest. This could mean that an intermediate result is being cached 
1000 loops, best of 3: 162 µs per loop

In [445]:
np.argmin(cholesky.dot(npr.randn(ns,100)))

cholesky.dot(npr.randn(ns,100)).shape


Out[445]:
(1000, 100)

In [438]:
la.cholesky(K_mat(xs,xs)+keps*np.eye(ns)).shape


Out[438]:
(1000, 1000)

In [584]:
plt.imshow(K_mat(xs,xs),cmap='Blues',interpolation='none');
plt.colorbar()
plt.figure()

plt.imshow(la.cholesky(K_mat(xs,xs)+keps*np.eye(ns)),cmap='Blues',interpolation='none');
plt.colorbar()


Out[584]:
<matplotlib.colorbar.Colorbar instance at 0x12023cdd0>

In [418]:
# what's the minimum in this draw?
xs[np.argmin(fs)]


Out[418]:
-1.6666666666666665

In [ ]: